Load libraries and other custom code.
library(easypackages)
libraries("here","ggplot2","ggridges","reshape2","patchwork","RColorBrewer",
"plotrix","tidyverse","cluster","ggpackets","wordcloud","proxy",
"ggeasy","NbClust","MASS","robustbase","MKinfer","impute","ggheatmap",
"janitor", "lme4", "lmtest","matlabr","psych")
options(matlab.path = "/Applications/MATLAB_R2021b.app/bin")
codepath = here("code")
resultpath = here("results")
plot_path = here("plots")
source(file.path(codepath,"utils.R"))
plot_title_name = "IIT"
WEIGHT = "none"
nstimuli = 700
fdr_thresh = 0.05
nperm = 1000
stim_names = character(length=nstimuli)
stim_ids = character(length=nstimuli)
for (i in 1:nstimuli){
stim_names[i] = sprintf("stim%03d",i)
stim_ids[i] = sprintf("1%03d",i)
}
# read in semantic feature file that tells us which features go along with which stimuli
semantic_features_annot =read.csv(file.path(codepath, "semantic_features.csv"))
semantic_features = colnames(semantic_features_annot)[2:ncol(semantic_features_annot)]
semantic_features_annot2 =read.csv(file.path(codepath, "semantic_features_custom.csv"))
semantic_features2 = colnames(semantic_features_annot2)[2:ncol(semantic_features_annot2)]
semantic_features_annot = cbind(semantic_features_annot, semantic_features_annot2[,semantic_features2])
semantic_features = colnames(semantic_features_annot)[2:ncol(semantic_features_annot)]
# compute some extra categories from combinations of existing categories
semantic_features_annot$social = (semantic_features_annot[,c("face")]==1 & semantic_features_annot[,c("human")]==1)*1
semantic_features_annot$nonsocial = (semantic_features_annot[,c("human")]==0 & semantic_features_annot[,c("animal")]==0)*1
semantic_features = colnames(semantic_features_annot)[2:ncol(semantic_features_annot)]
# masks for each semantic category
stim_masks = list()
for (sem_feat in semantic_features){
stim_masks[[sem_feat]] = as.character(semantic_features_annot$stimulus[semantic_features_annot[,sem_feat]==1])
}
# classifier output [0,1] subjects BY stimuli
fname = file.path(resultpath, sprintf("weight_%s_classification_accuracy_perSubStim.csv",WEIGHT))
classifier_output_sub_by_stim = read.csv(fname, row.names=1, na.strings = "NaN")
colnames(classifier_output_sub_by_stim) = stim_ids
# masks for each semantic category
sublist = rownames(classifier_output_sub_by_stim)
fp_masks = list()
for (subid in sublist){
fp_masks[[subid]] = colnames(classifier_output_sub_by_stim)[classifier_output_sub_by_stim[subid,]==1]
}
# classifier output [0,1] stimuli BY perm
fname = file.path(resultpath, sprintf("weight_%s_classification_accuracy_perStim_perm.csv",WEIGHT))
classifier_output_stim_by_perm = read.csv(fname, row.names=1, na.strings = "NaN")
# fingerprint ratios subject by stimulus
fname = file.path(resultpath, sprintf("weight_%s_fingerprintratios_perSubStim.csv",WEIGHT))
fpratio_sub_by_stim = read.csv(fname, row.names=1, na.strings = "NaN")
colnames(fpratio_sub_by_stim) = stim_ids
# mean fingerprint ratios stimulus by perm
fname = file.path(resultpath, sprintf("weight_%s_mean_fingerprintratios_perStim_perm.csv",WEIGHT))
mean_fpratio_stim_by_perm = read.csv(fname, row.names=1, na.strings = "NaN")
# intrasubject correlation subject by stimulus
fname = file.path(resultpath, sprintf("weight_%s_intrasubjectR_perSubStim.csv",WEIGHT))
intrasubR_sub_by_stim = read.csv(fname, row.names=1, na.strings = "NaN")
colnames(intrasubR_sub_by_stim) = stim_ids
# mean intersubject correlation subject by stimulus
fname = file.path(resultpath, sprintf("weight_%s_mean_intersubjectR_perSubStim.csv",WEIGHT))
mean_intersubR_sub_by_stim = read.csv(fname, row.names=1, na.strings = "NaN")
colnames(mean_intersubR_sub_by_stim) = stim_ids
# nfingerprintable stimuli per subject
fname = file.path(resultpath, sprintf("weight_%s_classification_nfingerprintableStimuli_perSub.csv",WEIGHT))
nfpstim_per_subject_original = read.csv(fname, row.names=1, na.strings = "NaN")
#weight nfp stimuli by the missing values in the intra correlation matrix
nfpstim_per_subject_original$missing_tot <- 700 - rowSums(is.na(intrasubR_sub_by_stim)) #calculate number of complete obs
nfpstim_per_subject_original$missing_ratio <- (nfpstim_per_subject_original$missing_tot/700) #divide complete obs by total number of stimuli
nfpstim_per_subject_original <- nfpstim_per_subject_original %>% mutate(fpstimuli_ratioed = (round(missing_ratio*nfingerprintable_stimuli, 0))) #multiple the complete obs to stimuli number ratio by the number of fingerprintable stimuli (this will reduce the count of fp stimuli for participants with missing data)
#create a new dataframe that contains the weighted number of fp stimuli
nfpstim_per_subject <- data.frame(nfpstim_per_subject_original$fpstimuli_ratioed)
rownames(nfpstim_per_subject) <- rownames(nfpstim_per_subject_original)
nfpstim_per_subject <- nfpstim_per_subject %>% rename(nfingerprintable_stimuli = nfpstim_per_subject_original.fpstimuli_ratioed)
# nfingerprintable stimuli subject by perm
fname = file.path(resultpath, sprintf("weight_%s_classification_nfingerprintableStimuli_perSub_perm.csv",WEIGHT))
nfpstim_sub_by_perm_original = read.csv(fname, row.names=1, na.strings = "NaN")
nfpstim_sub_by_perm = round(nfpstim_sub_by_perm_original*nfpstim_per_subject_original$missing_ratio, 0)
#read in phenotypic data
pheno_data = read.csv(here("data","pheno","pheno_OSIE.csv"))
na_mask = rowSums(is.na(intrasubR_sub_by_stim))>(700*0.15)
subs2exclude = rownames(intrasubR_sub_by_stim)[na_mask]
mask1 = is.element(pheno_data$subj_ID,rownames(intrasubR_sub_by_stim))
mask2 = !is.element(pheno_data$subj_ID, subs2exclude)
mask = mask1 & mask2
pheno_data_sub = pheno_data %>% filter(mask)
describe(pheno_data_sub)
## vars n mean sd median trimmed mad min max
## subj_ID* 1 105 53.00 30.45 53.00 53.00 38.55 1.00 105.00
## sex* 2 105 1.50 0.50 2.00 1.51 0.00 1.00 2.00
## age 3 105 26.21 6.30 25.00 25.34 4.45 18.00 50.00
## IQ 4 91 108.65 13.09 110.00 108.99 10.38 74.50 138.00
## test_retest_delay 5 105 7.33 1.46 7.00 7.00 0.00 4.00 16.00
## is_imagelist_shared 6 105 0.22 0.42 0.00 0.15 0.00 0.00 1.00
## S1_CalError_OSIE 7 105 0.48 0.30 0.42 0.44 0.17 0.11 2.59
## S2_CalError_OSIE 8 105 0.48 0.22 0.43 0.45 0.17 0.11 1.38
## Diff_CalError_OSIE 9 105 0.20 0.24 0.14 0.16 0.11 0.00 2.07
## AQ_Tot 10 105 16.92 6.72 16.00 16.55 4.45 4.00 38.00
## AQ_Soc 11 105 3.14 3.05 2.00 2.67 1.48 0.00 13.00
## AQ_Comm 12 105 1.66 1.59 1.00 1.46 1.48 0.00 8.00
## AQ_AttDet 13 105 3.46 1.92 3.00 3.41 2.97 0.00 7.00
## SRS_Tot_Raw 14 105 45.16 22.77 42.00 43.28 19.27 9.00 151.00
## SRS_DSM5_SOC 15 105 26.18 13.51 23.00 25.00 11.86 5.00 83.00
## SRS_DSM5_RRB 16 105 18.98 10.66 18.00 18.02 10.38 2.00 68.00
## SRS_Emp_ER 17 105 18.11 8.71 17.00 17.59 7.41 3.00 46.00
## SRS_Emp_Avoid 18 105 4.99 3.47 4.00 4.59 2.97 0.00 21.00
## SRS_Emp_IR 19 105 3.08 2.79 2.00 2.66 1.48 0.00 16.00
## SRS_Emp_Same 20 105 14.28 7.66 13.00 13.56 7.41 1.00 46.00
## SRS_Emp_RM 21 105 4.70 3.62 4.00 4.34 2.97 0.00 22.00
## SQ_Tot 22 98 56.43 17.52 53.50 55.23 14.83 22.00 114.00
## range skew kurtosis se
## subj_ID* 104.00 0.00 -1.23 2.97
## sex* 1.00 -0.02 -2.02 0.05
## age 32.00 1.41 2.09 0.62
## IQ 63.50 -0.32 -0.15 1.37
## test_retest_delay 12.00 3.76 16.15 0.14
## is_imagelist_shared 1.00 1.34 -0.21 0.04
## S1_CalError_OSIE 2.47 3.77 22.93 0.03
## S2_CalError_OSIE 1.28 1.31 2.13 0.02
## Diff_CalError_OSIE 2.07 4.79 31.75 0.02
## AQ_Tot 34.00 0.76 1.14 0.66
## AQ_Soc 13.00 1.33 1.22 0.30
## AQ_Comm 8.00 1.30 1.95 0.16
## AQ_AttDet 7.00 0.12 -1.00 0.19
## SRS_Tot_Raw 142.00 1.37 3.74 2.22
## SRS_DSM5_SOC 78.00 1.20 2.41 1.32
## SRS_DSM5_RRB 66.00 1.26 3.16 1.04
## SRS_Emp_ER 43.00 0.72 0.54 0.85
## SRS_Emp_Avoid 21.00 1.39 3.25 0.34
## SRS_Emp_IR 16.00 1.90 4.77 0.27
## SRS_Emp_Same 45.00 1.10 2.06 0.75
## SRS_Emp_RM 22.00 1.37 3.65 0.35
## SQ_Tot 92.00 0.76 0.41 1.77
table(pheno_data_sub$sex)
##
## F M
## 52 53
code2run = sprintf("cd %s", codepath)
code2run = sprintf("%s; resultpath = '%s'",code2run, resultpath)
code2run = sprintf("%s; load('%s')",code2run, file.path(resultpath, "weight_none_gsa_res.mat"))
code2run = sprintf("%s; id_mat = gsa_res.id_mat; id_mat_symm = id_mat; tmp_mat = squeeze(id_mat(:,:,1)); mask = tril(tmp_mat)~=0; for istim = 1:size(id_mat,3); tmp_mat = squeeze(id_mat(:,:,istim)); tmp_mat_rev = tmp_mat'; tmp_mat2use = tmp_mat; tmp_mat2use(mask) = tmp_mat_rev(mask); id_mat_symm(:,:,istim) = tmp_mat2use; end; id_mat = id_mat_symm; tmp_ct = nan(size(id_mat,1),size(id_mat,2)); for isub = 1:size(id_mat,1); tmp_mat = squeeze(id_mat(isub,:,:))'; tmp_ct(isub,:) = nanmedian(tmp_mat,1); end; data2use = tmp_ct; tab2write = cell2table([gsa_res.subids, num2cell(data2use)], 'VariableNames',[{'subid'},gsa_res.subids']); file2write = fullfile(resultpath,'weight_none_id_mat_all.csv'); writetable(tab2write, file2write)
",code2run)
code2run = sprintf("%s; semantic_features = readtable('%s')", code2run, file.path(resultpath, "semantic_features.csv"))
code2run = sprintf("%s; features2use = semantic_features.Properties.VariableNames(3:end)", code2run)
code2run = sprintf("%s; for i=1:length(features2use); curr_feature = features2use{i}; disp(curr_feature); feature_mask = semantic_features.(curr_feature)==1; tmp_ct = nan(size(id_mat,1),size(id_mat,2)); for isub = 1:size(id_mat,1); tmp_mat = squeeze(id_mat(isub,:,feature_mask))'; tmp_ct(isub,:) = nanmedian(tmp_mat,1); end; data2use = tmp_ct; tab2write = cell2table([gsa_res.subids, num2cell(data2use)], 'VariableNames',[{'subid'},gsa_res.subids']); file2write = fullfile(resultpath,['weight_none_id_mat_',curr_feature,'.csv']); writetable(tab2write, file2write); end; exit", code2run)
res = run_matlab_code(code2run)
# heatmap of semantic feature matrix to see which semantic features are usually co-occuring
df_sem_feat = as.matrix(semantic_features_annot[,2:ncol(semantic_features_annot)])
clust_method = "ward.D2"
dist_method = "binary"
nbc_index = "silhouette"
# cluster stimuli
stim_feat_res = NbClust(data = as.matrix(df_sem_feat),
index = nbc_index,
distance = dist_method,
method = clust_method,
min.nc=2, max.nc=6)
k_stim = stim_feat_res$Best.nc
row_cols = brewer.pal(k_stim[[1]], "Set1")[stim_feat_res$Best.partition]
# add clustering solution to semantic_features_annot
cluster_df = data.frame(stimulus = factor(semantic_features_annot$stimulus),
cluster = factor(stim_feat_res$Best.partition))
semantic_features_annot = merge(cluster_df,semantic_features_annot[, c("stimulus",semantic_features)], by = "stimulus")
# cluster semantic features
sem_feat_res = NbClust(data = as.matrix(t(df_sem_feat)),
index = nbc_index,
distance = dist_method,
method = clust_method,
min.nc=2, max.nc=6)
k_sem_feat = sem_feat_res$Best.nc
col_cols = brewer.pal(k_sem_feat[[1]], "Set1")[sem_feat_res$Best.partition]
# make heatmap
stim_hmap_res = heatmap(as.matrix(df_sem_feat),
hclustfun=function(d) hclust(d, method=clust_method),
scale = "none",
RowSideColors=row_cols,
ColSideColors=col_cols)
stim_order_dendrogram = stim_hmap_res$rowInd
stim_hmap_res
## $rowInd
## [1] 298 508 675 128 683 24 414 7 259 307 643 526 38 452 57 31 129 496
## [19] 208 76 54 19 73 264 37 594 9 82 122 635 622 551 507 375 214 175
## [37] 202 528 193 68 8 349 672 311 473 273 80 231 599 490 299 399 276 198
## [55] 564 404 377 123 12 97 598 515 174 424 519 645 166 87 337 179 699 625
## [73] 489 417 324 322 316 309 171 211 652 541 29 488 293 403 280 654 353 116
## [91] 314 591 655 476 266 134 230 471 323 251 245 199 92 91 5 16 487 659
## [109] 658 423 192 30 151 677 554 93 250 317 67 15 35 595 410 40 234 426
## [127] 262 416 644 315 21 43 272 137 49 1 26 687 664 653 607 559 557 523
## [145] 362 334 305 281 152 239 449 347 62 243 104 690 98 36 86 79 691 673
## [163] 556 548 499 384 105 109 604 671 651 458 363 328 209 120 153 638 624 383
## [181] 492 670 99 144 133 393 456 391 149 387 674 428 235 256 369 612 469 14
## [199] 275 520 678 400 589 581 65 118 642 306 371 665 660 534 634 143 667 381
## [217] 210 247 288 516 656 270 44 246 696 17 447 646 22 165 356 374 341 90
## [235] 390 279 61 150 662 558 261 457 578 613 583 350 479 52 530 700 682 420
## [253] 146 163 265 623 605 584 572 411 402 348 228 115 85 3 72 681 666 112
## [271] 318 81 278 629 609 582 346 302 289 25 84 514 320 453 693 58 639 69
## [289] 248 689 533 330 292 41 139 597 335 697 333 425 686 577 465 232 359 148
## [307] 552 434 477 161 580 373 60 71 310 421 63 100 661 332 647 257 611 159
## [325] 140 561 506 329 180 45 110 200 154 540 370 562 650 549 167 537 531 444
## [343] 338 282 156 147 354 158 679 585 368 263 698 503 4 379 602 657 430 569
## [361] 441 626 429 475 188 297 119 177 627 88 186 521 74 249 380 252 269 633
## [379] 603 482 408 191 244 571 542 497 450 419 212 83 203 308 355 344 205 481
## [397] 361 593 445 461 553 610 227 178 187 409 576 398 543 418 570 685 529 692
## [415] 579 459 588 586 493 467 451 385 366 378 242 285 641 46 164 630 592 454
## [433] 241 224 184 27 172 535 443 422 219 47 55 640 460 213 206 94 101 437
## [451] 201 33 225 486 223 621 412 494 518 168 466 560 357 616 319 448 107 325
## [469] 608 614 401 547 326 303 301 59 233 573 483 238 162 131 114 121 480 204
## [487] 536 505 512 555 485 498 550 501 502 565 382 649 510 491 440 389 395 48
## [505] 70 376 522 567 406 468 255 260 436 364 258 226 56 194 296 511 439 539
## [523] 372 394 669 78 217 396 504 221 397 111 196 351 185 50 222 636 606 600
## [541] 566 478 474 435 343 294 220 197 176 155 10 18 574 431 283 32 267 568
## [559] 253 271 313 618 601 538 53 321 500 632 495 472 470 463 438 427 415 342
## [577] 284 268 181 13 160 587 290 173 237 102 563 336 513 631 388 527 77 525
## [595] 2 545 637 6 215 145 157 64 95 170 11 130 620 517 433 89 358 345
## [613] 142 339 446 442 125 386 340 432 124 367 464 141 182 254 407 405 676 195
## [631] 286 277 392 413 546 240 103 138 695 236 229 66 51 360 189 365 617 615
## [649] 663 352 532 484 509 619 304 42 596 108 218 524 127 327 680 291 300 668
## [667] 106 113 96 117 688 648 455 183 287 590 544 190 216 462 207 136 331 132
## [685] 575 694 34 684 126 295 628 23 39 312 135 169 75 274 28 20
##
## $colInd
## [1] 1 10 14 3 4 5 7 9 16 6 8 12 11 2 15 13
##
## $Rowv
## NULL
##
## $Colv
## NULL
cluster_res = data.frame(clust_labels = stim_feat_res$Best.partition, clust_colors = row_cols)
ctab = as.data.frame(table(cluster_res$clust_labels))
for (i in 1:k_stim[[1]]){
tmp_col = as.character(unique(cluster_res$clust_colors[cluster_res$clust_labels==i]))
ctab$color[i] = plotrix::color.id(tmp_col)
ctab$clust_num[i] = unique(cluster_res$clust_labels[cluster_res$clust_labels==i])
}
ctab
## Var1 Freq color clust_num
## 1 1 158 firebrick2 1
## 2 2 542 steelblue 2
nfpstim_per_subject$c1 = rowSums(classifier_output_sub_by_stim[,cluster_res$clust_labels==1])
nfpstim_per_subject$c2 = rowSums(classifier_output_sub_by_stim[,cluster_res$clust_labels==2])
# get sum of stimuli within semantic clusters and plot as a word cloud
sem_feat_cols = unique(col_cols)
stim_cols = unique(row_cols)
# function to do gaze fingerprint classification
gaze_fingerprint_classifier <- function(file2use, na_mask, nperm=0){
tmp_data = read.csv(file2use, row.names=1)
tmp_data = tmp_data[!na_mask,!na_mask]
tmp_res = matrix(nrow = dim(tmp_data)[1], ncol=3)
for (isub in 1:dim(tmp_data)[1]){
tmp_res[isub,1] = isub
tmp_res[isub,2] = which(tmp_data[isub,]==max(tmp_data[isub,]))
}
tmp_res[,3] = tmp_res[,1]==tmp_res[,2]
accuracy = sum(tmp_res[,3])/dim(tmp_data)[1]
result = data.frame(matrix(nrow = 1, ncol = 3))
colnames(result) = c("accuracy", "mean_null", "pval")
result$accuracy = accuracy
if (nperm>0){
tmp_perm_res = data.frame(matrix(nrow = nperm, ncol = 2))
colnames(tmp_perm_res) = c("perm_num","accuracy")
for (iperm in 1:nperm){
# print(iperm)
subids = rownames(tmp_data)
set.seed(iperm)
rand_perm = sample(length(subids))
perm_tmp_data = tmp_data[subids[rand_perm],]
perm_tmp_res = matrix(nrow = dim(perm_tmp_data)[1], ncol=3)
for (isub in 1:dim(perm_tmp_data)[1]){
perm_tmp_res[isub,1] = isub
perm_tmp_res[isub,2] = which(perm_tmp_data[isub,]==max(perm_tmp_data[isub,]))
}
perm_tmp_res[,3] = perm_tmp_res[,1]==perm_tmp_res[,2]
perm_accuracy = sum(perm_tmp_res[,3])/dim(perm_tmp_data)[1]
tmp_perm_res[iperm, "perm_num"] = iperm
tmp_perm_res[iperm, "accuracy"] = perm_accuracy
}
p_value = (sum(tmp_perm_res$accuracy>=accuracy)+1)/(nperm+1)
result$pval = p_value
result$mean_null = mean(tmp_perm_res$accuracy)
}
return(result)
} # function gaze_fingerprint_classifier
# subjects to remove because of too many NAs
na_mask = rowSums(is.na(intrasubR_sub_by_stim))>(700*0.15)
features2use = c("all", semantic_features)
gfp_res = data.frame(matrix(nrow = length(features2use), ncol = 3))
colnames(gfp_res) = c("accuracy", "mean_null", "pval")
rownames(gfp_res) = features2use
for (ifeature in 1:length(features2use)){
file2use = file.path(resultpath,sprintf("weight_%s_id_mat_%s.csv",WEIGHT, features2use[ifeature]))
gfp_res[ifeature,] = gaze_fingerprint_classifier(file2use, na_mask, nperm=nperm)
}
gfp_res
## accuracy mean_null pval
## all 0.5047619 0.009571429 0.000999001
## text 0.5142857 0.009304762 0.000999001
## face 0.5047619 0.009352381 0.000999001
## emotion 0.4000000 0.009266667 0.000999001
## sound 0.2761905 0.009504762 0.000999001
## smell 0.4571429 0.009600000 0.000999001
## taste 0.5238095 0.009714286 0.000999001
## touch 0.4857143 0.009676190 0.000999001
## motion 0.4761905 0.009266667 0.000999001
## operability 0.4952381 0.009590476 0.000999001
## watchability 0.5238095 0.009314286 0.000999001
## touched 0.4857143 0.009209524 0.000999001
## gazed 0.4571429 0.009428571 0.000999001
## human 0.5047619 0.009542857 0.000999001
## animal 0.3904762 0.009647619 0.000999001
## social 0.5047619 0.009533333 0.000999001
## nonsocial 0.4857143 0.009257143 0.000999001
data2plot = gfp_res
data2plot$feature = rownames(gfp_res)
data2plot = data2plot %>% filter(!feature=="all")
data2plot$cluster = NA
red_clust = c("text","watchability")
yellow_clust = c("animal")
green_clust = c("emotion","sound")
purple_clust = c("smell","operability","nonsocial","taste")
blue_clust = c("human","touched","social","face","motion","gazed")
orange_clust = c("touch")
data2plot$cluster[is.element(data2plot$feature,red_clust)] = "red"
data2plot$cluster_color[is.element(data2plot$feature,red_clust)] = "#E41A1C"
data2plot$cluster[is.element(data2plot$feature,blue_clust)] = "blue"
data2plot$cluster_color[is.element(data2plot$feature,blue_clust)] = "#377EB8"
data2plot$cluster[is.element(data2plot$feature,green_clust)] = "green"
data2plot$cluster_color[is.element(data2plot$feature,green_clust)] = "#4DAF4A"
data2plot$cluster[is.element(data2plot$feature,purple_clust)] = "purple"
data2plot$cluster_color[is.element(data2plot$feature,purple_clust)] = "#984EA3"
data2plot$cluster[is.element(data2plot$feature,orange_clust)] = "orange"
data2plot$cluster_color[is.element(data2plot$feature,orange_clust)] = "#FF7F00"
data2plot$cluster[is.element(data2plot$feature,yellow_clust)] = "yellow"
data2plot$cluster_color[is.element(data2plot$feature,yellow_clust)] = "#FFFF33"
data2plot$cluster = factor(data2plot$cluster)
data2plot$feature = factor(data2plot$feature, levels = rev(c("taste","watchability","text","face","human","social","operability","touch","touched","nonsocial","motion","smell","gazed","emotion","animal","sound")))
# load in bootstrap accuracy for all stimuli
bootaccall = read.csv(file.path(resultpath,"bootstrap_accuracy_95CIs.csv"))
p = ggplot(data = data2plot, aes(y = feature, x = accuracy, fill = cluster)) + geom_bar(stat = "identity") + geom_vline(xintercept=gfp_res["all","accuracy"]) + geom_vline(xintercept=bootaccall[1,"low95"], linetype = "longdash") + geom_vline(xintercept=bootaccall[1,"hi95"], linetype = "longdash") + scale_fill_manual(values = c("#377EB8","#4DAF4A","#FF7F00","#984EA3","#E41A1C","#FFFF33")) + ylab("Semantic Feature") + xlab("Accuracy") + coord_cartesian(xlim=c(0.25,0.80)) + guides(fill="none") + ggtitle(plot_title_name) + easy_center_title()
ggsave(filename = file.path(plot_path, "fingerprint_accuracy_global_semantic_features.pdf"),
width = 4, height = 5)
p
wordcloud(words = data2plot$feature,
freq = data2plot$accuracy^12,
random.order = FALSE,
rot.per=0,
colors=data2plot$cluster_color,
ordered.colors=TRUE)
df_res = classifier_output_sub_by_stim
df_res = df_res[!na_mask,]
df_res_perm = classifier_output_stim_by_perm
nperm = ncol(df_res_perm)
subs2use = rownames(df_res)
df2plot = data.frame(stim_names = stim_names,
stim_ids = stim_ids,
accuracy = colSums(df_res[,stim_ids])/dim(df_res)[1],
site="IIT")
df2plot$site = factor(df2plot$site)
# calculate p-values based on permutation accuracies
for (istim in 1:nstimuli){
df2plot$pval[istim] = (sum(df_res_perm[istim,]>=df2plot$accuracy[istim])+1)/(nperm+1)
}
# calculate FDR
df2plot$fdr = p.adjust(df2plot$pval, method = "fdr")
df2plot$fingerprint = "No"
df2plot$fingerprint[df2plot$fdr<=fdr_thresh] = "Yes"
df2plot = merge(df2plot, semantic_features_annot, by.x = "stim_ids", by.y= "stimulus")
df2plot$stim_cluster_name = "C2"
df2plot$stim_cluster_name[df2plot$cluster==1] = "C1"
## Accuracy to fingerprintability plots
# make accuracy plot across all stimuli
p1 = ggplot(data = df2plot, aes(x = fingerprint, y = accuracy, colour=fingerprint)) +
geom_jitter(width=0.1, alpha = 0.5) +
geom_violin(draw_quantiles = c(0.25, 0.5, 0.75), fill=NA, colour="black") +
scale_colour_manual(values = c("orange","dodger blue")) + guides(colour=FALSE) +
xlab("Fingerprintable") +
ylab("Accuracy") + ggtitle(plot_title_name) + easy_center_title()
# make accuracy plot, but color according to stimulus cluster
p2 = ggplot(data = df2plot, aes(x = stim_cluster_name, y = accuracy, colour=stim_cluster_name)) +
geom_jitter(width=0.1, alpha = 0.5) +
geom_violin(draw_quantiles = c(0.25, 0.5, 0.75), fill=NA, colour="black") +
guides(colour=FALSE) + scale_colour_manual(values = c("#E41A1C","#377EB8")) +
xlab("Stimulus Cluster") +
ylab("Accuracy") + ggtitle(plot_title_name) + easy_center_title()
p_final = p1+p2
p_final
# tabulate number of stimuli where fingerprinting is possible or not
table(df2plot$fingerprint)
##
## No Yes
## 38 662
# what percentage of the 700 stimuli allow us to gaze fingerprint?
sum(df2plot$fingerprint=="Yes")/nstimuli
## [1] 0.9457143
# tabulate number of stimuli where fingerprinting is possible or not, by stimulus cluster
table(df2plot$fingerprint, df2plot$stim_cluster_name)
##
## C1 C2
## No 13 25
## Yes 145 517
# chi-square test on that fingerprint by stimulus cluster contingency table
chisq.test(table(df2plot$fingerprint, df2plot$stim_cluster_name))
##
## Pearson's Chi-squared test with Yates' continuity correction
##
## data: table(df2plot$fingerprint, df2plot$stim_cluster_name)
## X-squared = 2.4502, df = 1, p-value = 0.1175
# t-test comparing accuracy of C2 vs C1 stimuli
t.test(df2plot$accuracy[df2plot$stim_cluster_name=="C2"], df2plot$accuracy[df2plot$stim_cluster_name=="C1"])
##
## Welch Two Sample t-test
##
## data: df2plot$accuracy[df2plot$stim_cluster_name == "C2"] and df2plot$accuracy[df2plot$stim_cluster_name == "C1"]
## t = 4.3784, df = 292.24, p-value = 1.667e-05
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## 0.004113387 0.010830929
## sample estimates:
## mean of x mean of y
## 0.04707433 0.03960217
# Cohen's d effect size for accuracy of C2 vs C1 stimuli
dres = cohens_d(df2plot$accuracy[df2plot$stim_cluster_name=="C2"], df2plot$accuracy[df2plot$stim_cluster_name=="C1"]); dres
## [1] 0.3643683
# Accuracies across all semantic feature categories
cols2use = c("feature","accuracy","std","sem")
result_df = data.frame(matrix(nrow = length(semantic_features), ncol = length(cols2use)))
rownames(result_df) = semantic_features
colnames(result_df) = cols2use
for (sem_feat in semantic_features){
mask = semantic_features_annot[sem_feat]==1
stims2use = stim_ids[mask]
result_df[sem_feat,"feature"] = sem_feat
result_df[sem_feat,"accuracy"] = mean(colSums(df_res[stims2use])/dim(df_res)[1])
result_df[sem_feat,"std"] = sd(colSums(df_res[,stims2use])/dim(df_res)[1])
result_df[sem_feat,"sem"] = sd(colSums(df_res[,stims2use])/dim(df_res)[1])/sqrt(sum(mask))
}
# Fingerprint Ratios across all semantic feature categories
cols2use = semantic_features
result_df_fpr = data.frame(matrix(nrow = dim(fpratio_sub_by_stim)[1], ncol = length(semantic_features)))
rownames(result_df_fpr) = rownames(fpratio_sub_by_stim)
colnames(result_df_fpr) = cols2use
for (sem_feat in semantic_features){
mask = semantic_features_annot[sem_feat]==1
stims2use = stim_ids[mask]
result_df_fpr[,sem_feat] = rowMeans(fpratio_sub_by_stim[,stims2use], na.rm= TRUE)
}
result_df_fpr$subid = rownames(result_df_fpr)
df2plot = melt(result_df_fpr, id.vars = "subid")
result_df$feature = with(result_df, reorder(feature, accuracy))
feature_clusters = data.frame(feature = names(sem_feat_res$Best.partition), cluster = sem_feat_res$Best.partition)
result_df = merge(result_df, feature_clusters, by = "feature")
result_df$cluster = factor(result_df$cluster)
result_df
## feature accuracy std sem cluster
## 1 animal 0.04551639 0.02063842 0.0016630911 6
## 2 emotion 0.04986395 0.02169390 0.0018334691 3
## 3 face 0.04716205 0.02112236 0.0009253872 2
## 4 gazed 0.04727891 0.02071741 0.0014798149 2
## 5 human 0.04765890 0.02113001 0.0009664643 2
## 6 motion 0.04523063 0.02033629 0.0011386137 2
## 7 nonsocial 0.03960217 0.01816334 0.0014449980 4
## 8 operability 0.04395238 0.01983713 0.0014026970 4
## 9 smell 0.04342038 0.02305053 0.0023902282 4
## 10 social 0.04780620 0.02108902 0.0009854252 2
## 11 sound 0.04668228 0.01753562 0.0022452057 3
## 12 taste 0.04554205 0.02312023 0.0016862164 4
## 13 text 0.04862563 0.02183029 0.0013918483 1
## 14 touch 0.04169628 0.01934604 0.0013645644 5
## 15 touched 0.04784736 0.02132391 0.0012478875 2
## 16 watchability 0.04617389 0.02091404 0.0010430976 1
p = ggplot(data = result_df, aes(y = feature, x = accuracy, fill = cluster)) +
geom_bar(stat = "identity") +
geom_errorbar(aes(xmin=accuracy-sem, xmax=accuracy+sem), width=0.2) +
guides(fill=FALSE) +
coord_cartesian(xlim = c(0.035, 0.09)) +
xlab("Accuracy") + ylab("Semantic Feature") +
scale_fill_manual(values = unique(col_cols)) + ggtitle(plot_title_name) + easy_center_title()
ggsave(filename = file.path(plot_path, "fingerprint_accuracy_local_semantic_features.pdf"),
width = 4, height = 5)
p
fp_res = nfpstim_per_subject
fp_res$subids = rownames(fp_res)
fp_perm_res = nfpstim_sub_by_perm
na_mask = rowSums(is.na(intrasubR_sub_by_stim))>(700*0.15)
fp_res = fp_res %>% filter(!na_mask)
fp_perm_res = fp_perm_res %>% filter(!na_mask)
# figure out p-values for each subject based on permutation nfingerprintable stim
for (isub in 1:dim(fp_res)[1]){
fp_res$pval[isub] = (sum(fp_perm_res[isub,]>=fp_res$nfingerprintable_stimuli[isub])+1)/(nperm+1)
}
fp_res$fdr = p.adjust(fp_res$pval, method = "fdr")
fp_res$Fingerprintable = "Yes"
fp_res$Fingerprintable[fp_res$fdr>fdr_thresh] = "No"
fp_perm_res$Fingerprintable = "Yes"
fp_perm_res$Fingerprintable[fp_res$fdr>fdr_thresh] = "No"
# subjects who have significantly more nfingerprintable stimuli than expected by chance
fp_subs2include = fp_res$subids[fp_res$fdr<=fdr_thresh]
print(sprintf("%d subjects of %d with statistically significant number of fingerprintable stimuli",
length(fp_subs2include),
dim(fp_res)[1]))
## [1] "99 subjects of 105 with statistically significant number of fingerprintable stimuli"
# subjects whose nfingerprintable stimuli is no better than chance
fp_subs2exclude = fp_res$subids[fp_res$fdr>fdr_thresh]
print(sprintf("%d subjects of %d with non-statistically significant number of fingerprintable stimuli",
length(fp_subs2exclude),
dim(fp_res)[1]))
## [1] "6 subjects of 105 with non-statistically significant number of fingerprintable stimuli"
reorder_vect = order(fp_res[,"nfingerprintable_stimuli"])
ordered_fp_res_subs = rownames(fp_res)[reorder_vect]
ordered_fp_res = fp_res[ordered_fp_res_subs,]
ordered_fp_res
## nfingerprintable_stimuli c1 c2 subids pval fdr
## a8681Py534 0 0 0 a8681Py534 1.000000000 1.00000000
## hvu5113B2r 4 0 4 hvu5113B2r 0.469530470 0.47404519
## j51h15E4xv 7 3 4 j51h15E4xv 0.111888112 0.11406070
## nxaQkud83t 9 4 5 nxaQkud83t 0.049950050 0.05244755
## Wj74c75671 9 2 7 Wj74c75671 0.042957043 0.04602540
## T1358ru144 10 4 7 T1358ru144 0.006993007 0.01234060
## b9u81qz6G5 11 2 9 b9u81qz6G5 0.097902098 0.10078157
## b57665B367 11 3 8 b57665B367 0.030969031 0.03422893
## P9a579it51 11 4 7 P9a579it51 0.061938062 0.06439105
## M3go34rg95 11 4 8 M3go34rg95 0.004995005 0.01234060
## Yjkv6qz9d1 11 1 11 Yjkv6qz9d1 0.016983017 0.01981352
## I89n948112 13 3 12 I89n948112 0.045954046 0.04873914
## o77M6w8m26 13 4 11 o77M6w8m26 0.029970030 0.03347716
## b5F118a654 14 3 12 b5F118a654 0.009990010 0.01380199
## b1l2a37p3Z 14 4 11 b1l2a37p3Z 0.038961039 0.04217432
## aadbq388Ob 15 2 13 aadbq388Ob 0.021978022 0.02508361
## ytbukz3wX4 15 4 11 ytbukz3wX4 0.014985015 0.01808536
## tu2b9S1ms9 15 4 11 tu2b9S1ms9 0.016983017 0.01981352
## u17w5462Em 17 5 12 u17w5462Em 0.010989011 0.01424501
## Fljc85x4j4 17 4 14 Fljc85x4j4 0.000999001 0.01234060
## okj19fo9Fh 17 5 12 okj19fo9Fh 0.004995005 0.01234060
## ewv9511u6J 17 3 14 ewv9511u6J 0.002997003 0.01234060
## x5Ny88741k 17 2 15 x5Ny88741k 0.007992008 0.01234060
## vhs2B895gm 18 3 15 vhs2B895gm 0.007992008 0.01234060
## I28284j41f 18 4 14 I28284j41f 0.003996004 0.01234060
## ggr1delQ9y 18 5 14 ggr1delQ9y 0.012987013 0.01585624
## fgJ6xw7668 18 5 14 fgJ6xw7668 0.007992008 0.01234060
## x75646ye9T 19 4 15 x75646ye9T 0.006993007 0.01234060
## wG7615jxji 19 4 15 wG7615jxji 0.006993007 0.01234060
## y9s1dvTq2q 19 2 17 y9s1dvTq2q 0.002997003 0.01234060
## neCz5h3c81 19 1 18 neCz5h3c81 0.003996004 0.01234060
## c2i1N145o5 19 5 14 c2i1N145o5 0.007992008 0.01234060
## m3837o9wA8 20 3 17 m3837o9wA8 0.004995005 0.01234060
## z4z93Q7hs2 20 4 16 z4z93Q7hs2 0.012987013 0.01585624
## z4wsu3Z4l2 20 3 17 z4wsu3Z4l2 0.010989011 0.01424501
## hy2wC38x85 20 6 14 hy2wC38x85 0.003996004 0.01234060
## xj3R2c13ha 21 3 18 xj3R2c13ha 0.004995005 0.01234060
## ox17xwBqy6 21 7 14 ox17xwBqy6 0.009990010 0.01380199
## j6r171siTa 21 10 11 j6r171siTa 0.001998002 0.01234060
## Zd122o2d96 21 4 17 Zd122o2d96 0.017982018 0.02074848
## p15y46v54P 22 1 21 p15y46v54P 0.002997003 0.01234060
## uw968g1F9o 22 3 19 uw968g1F9o 0.002997003 0.01234060
## s145O1udx1 23 6 18 s145O1udx1 0.031968032 0.03496503
## gJuiz877gn 23 9 15 gJuiz877gn 0.005994006 0.01234060
## S91d18oh3x 23 4 21 S91d18oh3x 0.005994006 0.01234060
## x1J5788214 23 6 18 x1J5788214 0.003996004 0.01234060
## tx43oiW5ps 24 3 21 tx43oiW5ps 0.003996004 0.01234060
## j1218Kcj5g 24 4 21 j1218Kcj5g 0.011988012 0.01535050
## ax5I99lf78 25 2 23 ax5I99lf78 0.002997003 0.01234060
## Opb6k5eke1 25 4 21 Opb6k5eke1 0.024975025 0.02819761
## b83u91R6q5 25 2 23 b83u91R6q5 0.001998002 0.01234060
## dKfyt77oq4 25 3 22 dKfyt77oq4 0.005994006 0.01234060
## jmHk8186fv 25 9 16 jmHk8186fv 0.005994006 0.01234060
## e813o11n7T 26 7 19 e813o11n7T 0.004995005 0.01234060
## M97oq2u9z2 26 2 26 M97oq2u9z2 0.007992008 0.01234060
## p6o2qe1xQt 28 4 24 p6o2qe1xQt 0.008991009 0.01329656
## kju391wN3s 28 9 19 kju391wN3s 0.007992008 0.01234060
## yzfoH84397 30 8 22 yzfoH84397 0.003996004 0.01234060
## jT2r93k9m8 31 7 26 jT2r93k9m8 0.005994006 0.01234060
## uxxiod4U24 31 4 31 uxxiod4U24 0.002997003 0.01234060
## k7ifvzH4yx 31 7 24 k7ifvzH4yx 0.009990010 0.01380199
## tb51gQ2f32 32 9 23 tb51gQ2f32 0.003996004 0.01234060
## qixzE71638 33 8 25 qixzE71638 0.004995005 0.01234060
## b1Oe7158q9 33 7 27 b1Oe7158q9 0.004995005 0.01234060
## Yoea9j7gg5 33 3 30 Yoea9j7gg5 0.007992008 0.01234060
## wUe1ha8l71 35 7 28 wUe1ha8l71 0.004995005 0.01234060
## jbypJkj3a8 35 4 31 jbypJkj3a8 0.010989011 0.01424501
## wfvbs123Wn 36 9 28 wfvbs123Wn 0.010989011 0.01424501
## c192S19qf7 37 14 26 c192S19qf7 0.005994006 0.01234060
## aSzdcei682 37 6 31 aSzdcei682 0.009990010 0.01380199
## q241I76nd4 37 10 27 q241I76nd4 0.001998002 0.01234060
## hB3nx98zae 38 6 32 hB3nx98zae 0.007992008 0.01234060
## a34K18e29d 39 5 34 a34K18e29d 0.009990010 0.01380199
## ddf1bWbguf 40 10 31 ddf1bWbguf 0.003996004 0.01234060
## Z8m99686m1 42 8 37 Z8m99686m1 0.005994006 0.01234060
## Valj2s14g8 42 9 33 Valj2s14g8 0.006993007 0.01234060
## yr8lL9pd54 42 9 33 yr8lL9pd54 0.005994006 0.01234060
## P736124up6 43 6 38 P736124up6 0.001998002 0.01234060
## wc8b25ckoY 43 12 31 wc8b25ckoY 0.007992008 0.01234060
## y779g7Rjjm 44 8 36 y779g7Rjjm 0.006993007 0.01234060
## u44Dr4gs86 44 8 36 u44Dr4gs86 0.004995005 0.01234060
## nI83m4382t 44 6 43 nI83m4382t 0.000999001 0.01234060
## y7g3x8266T 46 9 38 y7g3x8266T 0.007992008 0.01234060
## G62a6q96v1 46 10 37 G62a6q96v1 0.004995005 0.01234060
## yc3hB2f247 47 9 38 yc3hB2f247 0.003996004 0.01234060
## tn95dVvyfq 48 11 37 tn95dVvyfq 0.012987013 0.01585624
## i34257K5o6 49 10 39 i34257K5o6 0.004995005 0.01234060
## r76lKaa359 49 9 40 r76lKaa359 0.004995005 0.01234060
## f59k8Ac558 50 10 41 f59k8Ac558 0.001998002 0.01234060
## v26h929uK8 52 11 41 v26h929uK8 0.006993007 0.01234060
## N8hwg5979i 52 12 40 N8hwg5979i 0.004995005 0.01234060
## gc7955R7k8 52 11 41 gc7955R7k8 0.002997003 0.01234060
## nJ334736dc 53 12 41 nJ334736dc 0.007992008 0.01234060
## e1n58k41tM 54 7 47 e1n58k41tM 0.005994006 0.01234060
## niz32f6icK 54 11 47 niz32f6icK 0.007992008 0.01234060
## Rbu59l2m2e 55 13 43 Rbu59l2m2e 0.010989011 0.01424501
## hli575Zm3a 60 12 49 hli575Zm3a 0.008991009 0.01329656
## j79z6Cf128 60 11 54 j79z6Cf128 0.008991009 0.01329656
## M19lz3o828 62 16 46 M19lz3o828 0.016983017 0.01981352
## eij9G9b2l2 63 15 48 eij9G9b2l2 0.012987013 0.01585624
## Zh77t5831g 63 9 54 Zh77t5831g 0.005994006 0.01234060
## Rw77rshc7y 67 8 59 Rw77rshc7y 0.006993007 0.01234060
## q7u25p4rM7 74 7 67 q7u25p4rM7 0.002997003 0.01234060
## tqH9bkdr78 82 11 71 tqH9bkdr78 0.005994006 0.01234060
## Xa376ga9a3 85 12 73 Xa376ga9a3 0.007992008 0.01234060
## Fingerprintable
## a8681Py534 No
## hvu5113B2r No
## j51h15E4xv No
## nxaQkud83t No
## Wj74c75671 Yes
## T1358ru144 Yes
## b9u81qz6G5 No
## b57665B367 Yes
## P9a579it51 No
## M3go34rg95 Yes
## Yjkv6qz9d1 Yes
## I89n948112 Yes
## o77M6w8m26 Yes
## b5F118a654 Yes
## b1l2a37p3Z Yes
## aadbq388Ob Yes
## ytbukz3wX4 Yes
## tu2b9S1ms9 Yes
## u17w5462Em Yes
## Fljc85x4j4 Yes
## okj19fo9Fh Yes
## ewv9511u6J Yes
## x5Ny88741k Yes
## vhs2B895gm Yes
## I28284j41f Yes
## ggr1delQ9y Yes
## fgJ6xw7668 Yes
## x75646ye9T Yes
## wG7615jxji Yes
## y9s1dvTq2q Yes
## neCz5h3c81 Yes
## c2i1N145o5 Yes
## m3837o9wA8 Yes
## z4z93Q7hs2 Yes
## z4wsu3Z4l2 Yes
## hy2wC38x85 Yes
## xj3R2c13ha Yes
## ox17xwBqy6 Yes
## j6r171siTa Yes
## Zd122o2d96 Yes
## p15y46v54P Yes
## uw968g1F9o Yes
## s145O1udx1 Yes
## gJuiz877gn Yes
## S91d18oh3x Yes
## x1J5788214 Yes
## tx43oiW5ps Yes
## j1218Kcj5g Yes
## ax5I99lf78 Yes
## Opb6k5eke1 Yes
## b83u91R6q5 Yes
## dKfyt77oq4 Yes
## jmHk8186fv Yes
## e813o11n7T Yes
## M97oq2u9z2 Yes
## p6o2qe1xQt Yes
## kju391wN3s Yes
## yzfoH84397 Yes
## jT2r93k9m8 Yes
## uxxiod4U24 Yes
## k7ifvzH4yx Yes
## tb51gQ2f32 Yes
## qixzE71638 Yes
## b1Oe7158q9 Yes
## Yoea9j7gg5 Yes
## wUe1ha8l71 Yes
## jbypJkj3a8 Yes
## wfvbs123Wn Yes
## c192S19qf7 Yes
## aSzdcei682 Yes
## q241I76nd4 Yes
## hB3nx98zae Yes
## a34K18e29d Yes
## ddf1bWbguf Yes
## Z8m99686m1 Yes
## Valj2s14g8 Yes
## yr8lL9pd54 Yes
## P736124up6 Yes
## wc8b25ckoY Yes
## y779g7Rjjm Yes
## u44Dr4gs86 Yes
## nI83m4382t Yes
## y7g3x8266T Yes
## G62a6q96v1 Yes
## yc3hB2f247 Yes
## tn95dVvyfq Yes
## i34257K5o6 Yes
## r76lKaa359 Yes
## f59k8Ac558 Yes
## v26h929uK8 Yes
## N8hwg5979i Yes
## gc7955R7k8 Yes
## nJ334736dc Yes
## e1n58k41tM Yes
## niz32f6icK Yes
## Rbu59l2m2e Yes
## hli575Zm3a Yes
## j79z6Cf128 Yes
## M19lz3o828 Yes
## eij9G9b2l2 Yes
## Zh77t5831g Yes
## Rw77rshc7y Yes
## q7u25p4rM7 Yes
## tqH9bkdr78 Yes
## Xa376ga9a3 Yes
fp_res$subids = factor(fp_res$subids, levels = ordered_fp_res_subs)
fp_perm_res$subids = rownames(fp_res)
fp_perm_res$subids = factor(fp_perm_res$subids, levels = ordered_fp_res_subs)
# melt perm data frame for plotting
melted_fp_perm_res = melt(fp_perm_res,id.vars = c("subids","Fingerprintable"))
# make plot
markerSize = 7
markerColor="blue"
fontSize = 10
# make ridges for the null distribution
p = ggplot(melted_fp_perm_res, aes(x = value, y = subids, fill=Fingerprintable)) +
geom_density_ridges_gradient(scale = 3, rel_min_height = 0.01) +
xlab("# of Fingerprintable Stimuli") + ggtitle("# Fingerprintable Stimuli")
# add star for actual value
p = p + geom_text(data = fp_res, aes(x = nfingerprintable_stimuli, y=subids), color=markerColor, label="*",size=markerSize) +
guides(color=FALSE, alpha=FALSE) + ggtitle(plot_title_name) + easy_center_title() +
theme(axis.text.x = element_text(size=fontSize),
axis.text.y = element_text(size=fontSize),
axis.title.x = element_text(size=fontSize),
strip.text.x = element_text(size=fontSize),
axis.title.y = element_text(size=fontSize),
plot.title = element_text(hjust = 0.5))
ggsave(p,filename = file.path(plot_path, "number_fingerprintable_stimuli_per_subject.pdf"),
width=6,height=7)
p
# make ggridges plot for gaze uniqueness index (GUI; aka fingerprint ratio)
# test each subject for whether GUI is greater than the null value of 0 after log10 transformation (because GUI is positively skeweed). This will allow you to identify which subjects have an overall GUI across all 700 stimuli greater than the null value of 0.
na_mask = rowSums(is.na(intrasubR_sub_by_stim))>(700*0.15)
tmp_gui = fpratio_sub_by_stim %>% filter(!na_mask)
tmp_gui = t(tmp_gui)
gui_res_sub = data.frame(matrix(nrow = dim(tmp_gui)[2], ncol = 4))
colnames(gui_res_sub) = c("subid","tstat","pval","fdr")
rownames(gui_res_sub) = colnames(tmp_gui)
subids2use = colnames(tmp_gui)
for (sub in subids2use){
gui_res_sub[sub, "subid"] = sub
tres = t.test(log10(tmp_gui[,sub]), mu = 0)
gui_res_sub[sub, "tstat"] = tres$statistic[[1]]
gui_res_sub[sub, "pval"] = tres$p.value[[1]]
}
gui_res_sub$fdr = p.adjust(gui_res_sub$pval, method = "fdr")
gui_res_sub$Unique = "Yes"
# anything with tstat<0 of fdr>0.05 is not unique
gui_res_sub$Unique[gui_res_sub$tstat<0 | gui_res_sub$fdr>0.05] = "No"
gui_res_sub = gui_res_sub[order(gui_res_sub$tstat),]
gui_res_sub$subid = factor(gui_res_sub$subid, levels = gui_res_sub$subid)
table(gui_res_sub$Unique)
##
## No Yes
## 21 84
gui_res_sub = gui_res_sub[order(-gui_res_sub$tstat),]
gui_res_sub$uniqueness_rank = c(1:dim(gui_res_sub)[1]) # make gaze uniqueness ranking based on strength of tstat
gui_res = fpratio_sub_by_stim %>% filter(!na_mask)
gui_res$subid = rownames(gui_res)
gui_res$subid = factor(gui_res$subid, levels = rev(gui_res_sub$subid))
gui_res = merge(gui_res, gui_res_sub[,c("subid","Unique")])
melted_gui_res = melt(gui_res, id.vars = c("subid","Unique"))
# make ridges plot
p = ggplot(melted_gui_res, aes(x = value, y = subid, fill=Unique)) +
geom_density_ridges_gradient(scale = 3, rel_min_height = 0.01) + xlim(-1,7) + geom_vline(xintercept = 1) +
xlab("Gaze Uniqueness Index (GUI)") + ggtitle(plot_title_name) + easy_center_title()
ggsave(p,filename = file.path(plot_path, "gaze_uniqueness_index_per_subject.pdf"),
width=6,height=7)
p
# subjects to remove because of too many NAs
na_mask = rowSums(is.na(intrasubR_sub_by_stim))>(700*0.15)
# compute jaccard distance matrix which is percentage of overlap in fingerprintable stimuli
jaccard_mat = as.matrix(dist(as.matrix(classifier_output_sub_by_stim), method = "binary"))
jaccard_mat = jaccard_mat[-which(na_mask),-which(na_mask)]
heatmap(as.matrix(jaccard_mat),
hclustfun=function(d) hclust(d, method=clust_method),
scale = "none")
# plot heatmap of classifier output matrix of subjects by stimuli matrix
tmp_data = classifier_output_sub_by_stim[-which(na_mask),]
heatmap(as.matrix(tmp_data),
hclustfun=function(d) hclust(d, method=clust_method),
scale = "none")
nb_clust_res = NbClust(data = as.matrix(tmp_data),index = "silhouette", distance = "binary",method = "ward.D2", min.nc=1,max.nc=dim(tmp_data)[1]-1)
nb_clust_res$Best.nc
## Number_clusters Value_Index
## 104.0000 0.9817
df_sem_feat = data.frame(df_sem_feat)
sem_feat_names = colnames(df_sem_feat)
nsubs = dim(fpratio_sub_by_stim)[1]
mean_fpr_per_sem_feat_res = data.frame(matrix(nrow = nsubs, ncol = length(sem_feat_names)))
rownames(mean_fpr_per_sem_feat_res) = rownames(fpratio_sub_by_stim)
colnames(mean_fpr_per_sem_feat_res) = sem_feat_names
mean_fpr_per_sem_feat_res$all = NA
for (isub in 1:nsubs){
for (isf in 1:length(sem_feat_names)){
# get mean fingerprint ratio
mean_fpr_per_sem_feat_res[isub, isf] = median(as.numeric(fpratio_sub_by_stim[isub,df_sem_feat[,isf]==1]), na.rm = TRUE)
} # for (isf in 1:length(sem_feat_names)){
mean_fpr_per_sem_feat_res[isub, "all"] = median(as.numeric(fpratio_sub_by_stim[isub,]), na.rm = TRUE)
mean_fpr_per_sem_feat_res[isub, "c1"] = median(as.numeric(fpratio_sub_by_stim[isub,cluster_res[,"clust_labels"]==1]), na.rm = TRUE)
mean_fpr_per_sem_feat_res[isub, "c2"] = median(as.numeric(fpratio_sub_by_stim[isub,cluster_res[,"clust_labels"]==2]), na.rm = TRUE)
} # for (isub in 1:nsubs){
df_res = classifier_output_sub_by_stim
# remove subs that don't have statistically significant nfingerprintable stimuli
df_res = df_res[fp_subs2include,]
nsubs = dim(df_res)[1]
nTotal = nstimuli
enrich_res_OR = data.frame(matrix(nrow = nsubs, ncol = length(sem_feat_names)))
colnames(enrich_res_OR) = sem_feat_names
rownames(enrich_res_OR) = rownames(df_res)
enrich_res_P = data.frame(matrix(nrow = nsubs, ncol = length(sem_feat_names)))
colnames(enrich_res_P) = sem_feat_names
rownames(enrich_res_P) = rownames(df_res)
nfp_res = data.frame(matrix(nrow = nsubs, ncol=1))
rownames(nfp_res) = rownames(df_res)
colnames(nfp_res) = c("nFP")
for (isub in 1:nsubs){
nFP = sum(df_res[isub,])
nfp_res[isub,1] = nFP
fp_mask = df_res[isub,]==1
for (isf in 1:length(sem_feat_names)){
semfeat2use = sem_feat_names[isf]
nSemantic = sum(df_sem_feat[,semfeat2use])
nFP_given_Semantic = sum(df_sem_feat[fp_mask,semfeat2use])
tmp_enrich_res = enrichmentTest(nFP_given_Semantic, nFP, nSemantic, nTotal)
enrich_res_OR[isub,semfeat2use] = tmp_enrich_res$OR
enrich_res_P[isub,semfeat2use] = tmp_enrich_res$p
} # for (isf in 1:length(sem_feat_names)){
} # for (isub in 1:nsubs){
clust_method = "ward.D2"
nbclust_index = c("kl","ch","ccc","cindex","db","silhouette","duda","pseudot2",
"ratkowsky","ptbiserial","gap","mcclain","gamma","gplus","tau",
"sdindex")
res = NbClust(data = enrich_res_OR, method = clust_method, index = nbclust_index)
a = data.frame(res$Best.nc[1,]); colnames(a) = "k"; a$index = rownames(a)
best_nk = as.numeric(names(table(a$k))[table(a$k)==max(table(a$k))])
res = NbClust(data = enrich_res_OR, method = clust_method, index = "ch")
row_subtype = res$Best.partition
row_cols = brewer.pal(9, "Set1")[res$Best.partition]
nbclust_index = c("kl","ch","cindex","db","silhouette","duda","pseudot2",
"ratkowsky","ptbiserial","gap","mcclain","gamma","gplus","tau",
"sdindex")
res = NbClust(data = t(enrich_res_OR), method = clust_method, index=nbclust_index)
a = data.frame(res$Best.nc[1,]); colnames(a) = "k"; a$index = rownames(a)
best_nk = as.numeric(names(table(a$k))[table(a$k)==max(table(a$k))])
res = NbClust(data = t(enrich_res_OR), method = clust_method, index="cindex")
col_cols = brewer.pal(9, "Set1")[res$Best.partition]
feature_clusters = data.frame(feature = colnames(enrich_res_OR), cluster = col_cols)
new_row_cols = get_ggColorHue(6)
tmp_row_cols = unique(row_cols)
row_cols[row_cols==tmp_row_cols[1]] = new_row_cols[3]
row_cols[row_cols==tmp_row_cols[2]] = new_row_cols[4]
tmp_col_cols = unique(col_cols)
col_cols[col_cols==tmp_col_cols[1]] = new_row_cols[5]
col_cols[col_cols==tmp_col_cols[2]] = new_row_cols[6]
heatmap(as.matrix(enrich_res_OR),
hclustfun=function(d) hclust(d, method=clust_method),
scale = "none",
RowSideColors=row_cols,
ColSideColors=col_cols)
# plot this so you can screenshot the color scale
ggheatmap(as.matrix(enrich_res_OR), color=colorRampPalette(hcl.colors(12, "YlOrRd", rev = TRUE))(100), scale="none",legendName="OR")
subtype_df = data.frame(subid = rownames(enrich_res_OR), subtype = row_subtype)
table(subtype_df$subtype)
##
## 1 2
## 64 35
enrich_res_OR$subid = rownames(enrich_res_OR)
df2plot = merge(enrich_res_OR, subtype_df, by = "subid")
rownames(df2plot) = df2plot$subid
df2plot$subtype = factor(df2plot$subtype)
df4plot = melt(df2plot, id.vars = c("subid","subtype"))
h_test_res = list()
res_colnames = c("feature","t","p","d")
for (subtype in unique(row_subtype)){
subtype_name = sprintf("S%d",subtype)
h_test_res[[subtype_name]] = data.frame(matrix(
nrow = length(sem_feat_names),
ncol = length(res_colnames)))
rownames(h_test_res[[subtype_name]]) = sem_feat_names
colnames(h_test_res[[subtype_name]]) = res_colnames
}
for (subtype in unique(row_subtype)){
subtype_name = sprintf("S%d",subtype)
for (sem_feat in sem_feat_names){
h_test_res[[subtype_name]][sem_feat, "feature"] = sem_feat
t_res = t.test(df2plot[df2plot$subtype==subtype,sem_feat], mu=1)
h_test_res[[subtype_name]][sem_feat, "t"] = t_res$statistic
h_test_res[[subtype_name]][sem_feat, "p"] = t_res$p.value
tmp_mu = mean(df2plot[df2plot$subtype==subtype,sem_feat], na.rm = TRUE)
tmp_sd = sd(df2plot[df2plot$subtype==subtype,sem_feat], na.rm = TRUE)
h_test_res[[subtype_name]][sem_feat, "d"] = (tmp_mu-1)/tmp_sd
}
h_test_res[[subtype_name]]$fdr = p.adjust(h_test_res[[subtype_name]]$p, method = "fdr")
}
# check if subtypes differ by sex
a = merge(pheno_data,df2plot[,c("subid","subtype")], by.x = "subj_ID",by.y = "subid")
chisq.test(table(a$sex, a$subtype))
##
## Pearson's Chi-squared test with Yates' continuity correction
##
## data: table(a$sex, a$subtype)
## X-squared = 0.049762, df = 1, p-value = 0.8235
reorder_vect = order(h_test_res[["S2"]][,"t"])
ordered_sem_feat_by_t = c("gazed","touched","operability","touch","animal",
"sound","smell","nonsocial","taste","text","watchability",
"social","face","human","emotion","motion")
for (subtype in unique(row_subtype)){
subtype_name = sprintf("S%d",subtype)
print(subtype_name)
print(h_test_res[[subtype_name]][ordered_sem_feat_by_t,])
}
## [1] "S1"
## feature t p d fdr
## gazed gazed -0.6901391 0.4926413423 -0.08626739 0.606327806
## touched touched -0.0157684 0.9874689880 -0.00197105 0.987468988
## operability operability 1.7295878 0.0886010506 0.21619847 0.202516687
## touch touch 0.1247483 0.9011200362 0.01559354 0.961194705
## animal animal 0.4900551 0.6257972088 0.06125689 0.715196810
## sound sound 1.3624726 0.1778998357 0.17030908 0.355799671
## smell smell 2.8522034 0.0058653541 0.35652543 0.031281889
## nonsocial nonsocial 2.2086022 0.0308482332 0.27607527 0.082261955
## taste taste 3.4190864 0.0011067413 0.42738580 0.008853930
## text text 2.5570253 0.0129779816 0.31962817 0.051911926
## watchability watchability 2.4290659 0.0180022380 0.30363324 0.057607162
## social social -1.0989039 0.2759918479 -0.13736299 0.401442688
## face face -1.2942636 0.2002986308 -0.16178295 0.356065840
## human human -0.7078459 0.4816515429 -0.08848074 0.606327806
## emotion emotion 1.2319635 0.2225411500 0.15399544 0.356065840
## motion motion -4.0487783 0.0001433886 -0.50609729 0.002294218
## [1] "S2"
## feature t p d fdr
## gazed gazed 3.9701013 3.527683e-04 0.6710696 8.063277e-04
## touched touched 4.9799653 1.823580e-05 0.8417678 4.862881e-05
## operability operability -3.6160523 9.582779e-04 -0.6112244 1.533245e-03
## touch touch -3.3993653 1.739519e-03 -0.5745976 2.530209e-03
## animal animal 1.1630151 2.529213e-01 0.1965854 2.697827e-01
## sound sound 1.2744010 2.111630e-01 0.2154131 2.413292e-01
## smell smell -5.9557618 9.829425e-07 -1.0067075 3.145416e-06
## nonsocial nonsocial -11.7800774 1.499182e-13 -1.9911965 2.398692e-12
## taste taste -3.3139543 2.192178e-03 -0.5601605 2.922904e-03
## text text 1.8480581 7.330821e-02 0.3123788 9.022549e-02
## watchability watchability 0.8382835 4.077263e-01 0.1416958 4.077263e-01
## social social 8.2258883 1.345073e-09 1.3904289 7.173721e-09
## face face 6.2060659 4.657501e-07 1.0490166 1.863001e-06
## human human 8.3173376 1.042847e-09 1.4058867 7.173721e-09
## emotion emotion 3.6212376 9.445559e-04 0.6121009 1.533245e-03
## motion motion 3.6301242 9.214799e-04 0.6136030 1.533245e-03
df4plot$variable = factor(df4plot$variable, levels = ordered_sem_feat_by_t)
colnames(df4plot)[colnames(df4plot)=="variable"] = "feature"
colnames(df4plot)[colnames(df4plot)=="value"] = "OR"
feature_clusters$feature = factor(feature_clusters$feature, levels = levels(df4plot$feature))
df4plot$feature_cluster = NA
for (ifeat in 1:length(sem_feat_names)){
feature_name2use = sem_feat_names[ifeat]
cluster2use = feature_clusters[feature_clusters$feature==feature_name2use, "cluster"]
df4plot$feature_cluster[df4plot$feature==feature_name2use] = cluster2use
}
df4plot$feature_cluster = factor(df4plot$feature_cluster)
fingerprint_profiles = df4plot
p = ggplot(data = df4plot, aes(x = feature, y = OR, colour=feature_cluster)) +
facet_grid(subtype ~ .) +
geom_scatterbox() +
geom_hline(yintercept=1) +
xlab("Semantic Features") +
ylab("Odds Ratio") +
scale_colour_manual(values = c("#F564E3","#619CFF")) +
guides(colour=FALSE) +
coord_flip() + ggtitle(plot_title_name) + easy_center_title()
ggsave(filename = file.path(plot_path, "fingerprintable_stimuli_semantic_enrichment.pdf"),
width = 4, height = 5)
p
colnames(pheno_data)[colnames(pheno_data)=="subj_ID"] = "subid"
df2use = right_join(df2plot, pheno_data, by="subid")
df2use$subtype2 = "NF"
unique_subtypes = levels(df2use$subtype)
for (subtype in unique_subtypes){
df2use$subtype2[df2use$subtype==subtype] = sprintf("S%s",subtype)
}
df2use = merge(df2use, fp_res, by.x = "subid", by.y = "subids")
na_mask = rowSums(is.na(intrasubR_sub_by_stim))>(700*0.15)
mean_fpr_per_sem_feat_res = mean_fpr_per_sem_feat_res %>% filter(!na_mask)
mean_fpr_per_sem_feat_res$subid = rownames(mean_fpr_per_sem_feat_res)
new_df = merge(mean_fpr_per_sem_feat_res, pheno_data, by = "subid")
pca_res = princomp(scale(new_df[,c("AQ_Tot","SRS_Tot_Raw")]))
summary(pca_res)
## Importance of components:
## Comp.1 Comp.2
## Standard deviation 1.3269348 0.4692510
## Proportion of Variance 0.8888431 0.1111569
## Cumulative Proportion 0.8888431 1.0000000
new_df$at_pc1 = pca_res$scores[,1]
new_df = merge(new_df, gui_res_sub, by = "subid")
new_df = merge(new_df, df2use[,c("subid","nfingerprintable_stimuli")])
# compute median intra-subject similarity
a = data.frame(med_intrasub=rowMedians(as.matrix(intrasubR_sub_by_stim), na.rm=TRUE), subid=rownames(intrasubR_sub_by_stim))
a = a %>% filter(!na_mask)
new_df = merge(new_df, a, by ="subid")
# compute median avg inter-subject similarity
a = data.frame(med_intersub=rowMedians(as.matrix(mean_intersubR_sub_by_stim), na.rm=TRUE), subid=rownames(mean_intersubR_sub_by_stim))
a = a %>% filter(!na_mask)
new_df = merge(new_df, a, by ="subid")
# global gui is median intra-sub divided by median avg inter-sub
new_df$global_gui = new_df$med_intrasub/new_df$med_intersub
# PCA gaze fingerprinting metrics
pca_res = princomp(scale(new_df[,c("all","nfingerprintable_stimuli")]))
summary(pca_res)
## Importance of components:
## Comp.1 Comp.2
## Standard deviation 1.3360906 0.44250898
## Proportion of Variance 0.9011515 0.09884851
## Cumulative Proportion 0.9011515 1.00000000
new_df$fp_pc1 = pca_res$scores[,1]
new_df$fp_pc2 = pca_res$scores[,2]
# define who is an autistic traits outlier by either SRS or AQ
new_df$AQ_outlier = "No"
new_df$AQ_outlier[new_df$AQ_Tot>26] = "Yes" # Woodbury-Smith et al., 2005 JADD cut-off
new_df$SRS_outlier = "No"
new_df$SRS_outlier[new_df$SRS_Tot_Raw>80.81] = "Yes" # 2 SD cut-off for TD Italian adults from SRS-2 manual
new_df$AT_outlier = new_df$SRS_outlier=="Yes" | new_df$AQ_outlier=="Yes"
# test correlation between AQ and SRS
cor.test(new_df$AQ_Tot, new_df$SRS_Tot_Raw)
##
## Pearson's product-moment correlation
##
## data: new_df$AQ_Tot and new_df$SRS_Tot_Raw
## t = 12.555, df = 103, p-value < 2.2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## 0.6886711 0.8436072
## sample estimates:
## cor
## 0.7776862
p = ggplot(data = new_df, aes(x = AQ_Tot, y = SRS_Tot_Raw)) +
geom_point(data = new_df, aes(x = AQ_Tot, y = SRS_Tot_Raw, colour = AT_outlier)) +
geom_smooth(method = lmrob, colour="black") + guides(colour=FALSE) +
xlab("AQ Total") + ylab("SRS Total")
ggsave(p, filename = file.path(plot_path, "AQ_by_SRS.pdf"))
p
# test correlation between number of fingerprintable stimuli and median GUI
cor.test(new_df$nfingerprintable_stimuli, new_df$all)
##
## Pearson's product-moment correlation
##
## data: new_df$nfingerprintable_stimuli and new_df$all
## t = 13.641, df = 103, p-value < 2.2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## 0.7216009 0.8614933
## sample estimates:
## cor
## 0.802303
p = ggplot(data = new_df, aes(x = nfingerprintable_stimuli, y = all)) +
geom_point() +
geom_smooth(method = lmrob, colour="black") + guides(colour=FALSE) +
xlab("Fingerprintable Stimuli") + ylab("Median Gaze Uniqueness Index (GUI)")
ggsave(p, filename = file.path(plot_path, "nfingerprintable_stimuli_by_medianGUI.pdf"))
p
# run linear model with sex and FP PC1 and PC2 predicting autistic traits PC1
form2use = as.formula(sprintf("at_pc1 ~ sex + %s + %s","fp_pc1","fp_pc2"))
mod2use = lmrob(data = new_df, formula = form2use)
summary(mod2use)$coefficients
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.3817752 0.16382788 -2.330343 2.177677e-02
## sexM 0.4328317 0.20074730 2.156102 3.344986e-02
## fp_pc1 -0.3578814 0.06787054 -5.273000 7.658472e-07
## fp_pc2 -0.3224181 0.20831048 -1.547777 1.248038e-01
summary(mod2use)$r.squared
## [1] 0.2329668
summary(mod2use)$adj.r.squared
## [1] 0.2101836
# model comparisons - gaze fingerprinting PC1, median intra-sub, median inter-sub
model_variances = data.frame(matrix(nrow = 3, ncol = 2))
rownames(model_variances) = c("Gaze Fingerprinting PC1","Median Intra-Subject Similarity","Median Inter-Subject Similarity")
colnames(model_variances) = c("Model","Percentage Variance Explained")
# run linear model with sex and FP PC1 predicting autistic traits PC1
form2use = as.formula(sprintf("at_pc1 ~ sex + %s","fp_pc1"))
model1 = lmrob(data = new_df, formula = form2use)
summary(model1)$coefficients
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.4113238 0.1679866 -2.448552 1.604928e-02
## sexM 0.4834853 0.2021414 2.391817 1.859694e-02
## fp_pc1 -0.3556895 0.0696950 -5.103515 1.549848e-06
summary(model1)$r.squared
## [1] 0.212732
model_variances["Gaze Fingerprinting PC1","Model"] = "Gaze Fingerprinting PC1"
model_variances["Gaze Fingerprinting PC1","Percentage Variance Explained"] = summary(model1)$r.squared
summary(model1)$adj.r.squared
## [1] 0.1972954
# run linear model with sex and median intrasub-similarity
form2use = as.formula(sprintf("at_pc1 ~ sex + %s","med_intrasub"))
model2 = lmrob(data = new_df, formula = form2use)
summary(model2)$coefficients
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.7593069 0.8281129 2.124477 0.036047008
## sexM 0.4429568 0.2136416 2.073364 0.040658312
## med_intrasub -3.8299968 1.4477434 -2.645494 0.009448267
summary(model2)$r.squared
## [1] 0.1167606
model_variances["Median Intra-Subject Similarity","Model"] = "Median Intra-Subject Similarity"
model_variances["Median Intra-Subject Similarity","Percentage Variance Explained"] = summary(model2)$r.squared
summary(model2)$adj.r.squared
## [1] 0.09944221
# run linear model with sex and median avg intersub-similarity
form2use = as.formula(sprintf("at_pc1 ~ sex + %s","med_intersub"))
model3 = lmrob(data = new_df, formula = form2use)
summary(model3)$coefficients
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.30722743 1.306156 -0.23521494 0.81451349
## sexM 0.38589817 0.223247 1.72857046 0.08691185
## med_intersub -0.09931571 2.951955 -0.03364405 0.97322674
summary(model3)$r.squared
## [1] 0.02961302
model_variances["Median Inter-Subject Similarity","Model"] = "Median Inter-Subject Similarity"
model_variances["Median Inter-Subject Similarity","Percentage Variance Explained"] = summary(model3)$r.squared
summary(model3)$adj.r.squared
## [1] 0.01058582
feature2use = "fp_pc1"
p = ggplot(data = new_df, aes(x = .data[[feature2use]], y = at_pc1)) +
geom_point(data = new_df, aes(x = fp_pc1, y = at_pc1, colour = AT_outlier)) +
geom_smooth(method = lmrob, colour="black") + guides(colour=FALSE) +
ylab("Autistic Traits PC1") + xlab("Gaze Fingerprinting PC1")
ggsave(p, filename = file.path(plot_path, "autistic_traits_PC1_by_fingerprinting_PC1.pdf"))
p
x_var = "Percentage Variance Explained"
y_var = "Model"
model_variances$Model = factor(model_variances$Model,
levels = rev(c("Gaze Fingerprinting PC1",
"Median Intra-Subject Similarity",
"Median Inter-Subject Similarity")))
p = ggplot(data = model_variances, aes(x = .data[[x_var]], y = .data[[y_var]])) +
geom_bar(stat = "identity") + xlab("Percentage Variance Explained in Autistic Traits PC1")
ggsave(p, filename = file.path(plot_path, "autistic_traits_model_comparisons.pdf"))
p